Metabarcoding Singapore
ASV from Dada2
Metabarcoding Singapore
ASV from Dada2
- 1 Aim
- 2 Initialize
- 3 Read the data
- 4 Assignment of eukaryotic ASVs based on PR2 database
- 5 Process BLAST file
- 6 Map
- 7 Phyloseq analysis
- 7.1 Create phyloseq files for euk after filtering the data
- 7.2 Break up into photosynthetic and non-photosynthetic
- 7.3 Create phyloseq files for proks
- 7.4 Normalize number of reads in each sample using median sequencing depth.
- 7.5 Phyloseq files for abundant taxa
- 7.6 Create a list for the auto and hetero phyloseq files
- 7.7 Create tabular files for other plots (only for eukaryotes)
- 7.8 Treemaps at division and class levels
- 7.9 Most abundant species
- 7.10 Bar plot of divisions per station
- 7.11 Bar plot of class per station
- 7.12 Compare by Straight, Site, Moonsoon (abundant OTUs only)
- 7.13 Main taxa
- 7.14 Heatmaps
- 7.15 NMDS
- 7.16 Network analysis
1 Aim
- Assign and analyze eukaryotes for Singapore metabarcoding data (ASV assigned with dada2 as implemented on Mothur).
- Do some analyzes with the prokaryotes too…
2 Initialize
This file defines all the necessary libraries and variables
source('Metabarcoding Singapore_init.R', echo=FALSE)3 Read the data
3.1 File names
full_path_data <- function(file_name) {
str_c("../qiime/2018-09-06_dada2/", file_name)
}
taxo_file <- full_path_data("taxonomy.tsv")
otu_file <- full_path_data("feature-table_unrarefied.tsv")
sequence_file <- full_path_data("ref_sequences_unrarefied.fasta")
metadata_file <- "../metadata/Singapore_metadata.xlsx"
sequence_file_euk <- full_path_data("ASV_unrarefied_euk.fasta")
sequence_file_mamiello <- full_path_data("ASV_Mamiello.fasta")
dada2_taxo_file_euk <- full_path_data("ASV_unrarefied_euk.dada2.taxo")
dada2_boot_file_euk <- full_path_data("ASV_unrarefied_euk.dada2.boot")
otu_table_final_file <- full_path_data("ASV_final.tsv")
blast_file <- full_path_data("ASV_unrarefied_euk.blast.tsv")3.2 Read the files
- The dada2 treatment has already removed the forward and reverse primers, so no need to remove them
- Work with the unrarefied data
# Read the sample and metadata tables
sample_table <- read_excel(metadata_file, sheet="samples", range="A1:D89")
metadata_table <- read_excel(metadata_file, sheet="metadata", na=c("ND", ""))
station_table <- read_excel(metadata_file, sheet="stations", na=c("ND", ""))
sample_table <- left_join(sample_table, metadata_table) %>%
mutate(sample_label = str_c(strait_label,location_label,
monsoon,sprintf("%03d",day_number),
sep="_"))
# Read the taxonomy table
taxo_table <- read_tsv(taxo_file)
# Clean up the taxonomy
taxo_table <- taxo_table %>%
mutate(taxo_clean = str_replace_all(Taxon, "D_[0-9]+__","")) %>%
separate(col=taxo_clean, into=str_c("taxo", c(1:7)), sep=";") %>%
rename(otu_name = `Feature ID`)
# Read the otu table
otu_table <- read_tsv(otu_file, skip=1) %>% # Jump the first line
rename(otu_name = `#OTU ID`) %>%
mutate(otu_id = str_c("otu_", sprintf("%04d",row_number())))
# Read the sequences
otu_sequences <- readAAStringSet(sequence_file)
otu_sequences.df <- data.frame (otu_name=names(otu_sequences),sequence=as.character(otu_sequences))
# Remove the primers - Not necessary because the primers have been removed
# fwd_length = 20
# rev_length = 15
# otu_sequences.df <- otu_sequences.df %>%
# separate (col=names, into=c("otu_id_qiime", "otu_rep_seq"), sep=" ") %>%
# mutate (sequence = str_sub(sequence, start=fwd_length+1, end = - rev_length - 1))
otu_table <- taxo_table %>%
left_join(otu_table) %>%
left_join(otu_sequences.df) %>%
arrange(otu_id)
# Write a fasta file for blast with all taxonomy roups
# otu_sequences <- otu_table %>% transmute(sequence=sequence, seq_name=otu_id)
# fasta_write(otu_sequences, file_name="../qiime/otu_rep_98_all.fasta", compress = FALSE, taxo_include = FALSE) 3.3 Only keep the eukaryotes in the OTU file
otu_table_euk <- otu_table %>% filter(str_detect(Taxon, "Eukaryota"))
# Write the fasta file file
otu_sequences_euk <- otu_table_euk %>% transmute(sequence = sequence, seq_name = otu_id)
fasta_write(otu_sequences_euk, file_name = sequence_file_euk, compress = FALSE,
taxo_include = FALSE)[1] TRUE
4 Assignment of eukaryotic ASVs based on PR2 database
4.1 Use dada2 to reassign to PR2
dada2_assign(seq_file_name = sequence_file_euk)4.2 Read the PR2 assignement and merge with initial otu table
otu_euk_pr2 <- read_tsv(dada2_taxo_file_euk)
otu_euk_pr2_boot <- read_tsv(dada2_boot_file_euk) %>% rename_all(funs(str_c(.,
"_boot"))) %>% rename(seq_name = seq_name_boot)
otu_euk_pr2 <- left_join(otu_euk_pr2, otu_euk_pr2_boot) %>% rename(otu_id = seq_name)
otu_table_final <- left_join(otu_table, otu_euk_pr2) %>% select(otu_id, otu_name,
taxo1:taxo7, Taxon, kingdom:species_boot, matches("EC|PR|RM|SBW|STJ"), sequence)
write_tsv(otu_table_final, otu_table_final_file, na = "")4.3 Export fasta file with taxonomy for Mamiellophyceae
otu_mamiello <- otu_table_final %>% filter(class == "Mamiellophyceae") %>% select(sequence = sequence,
seq_name = otu_id, supergroup:species)
fasta_write(otu_mamiello, file_name = sequence_file_mamiello, compress = FALSE,
taxo_include = TRUE)[1] TRUE
5 Process BLAST file
BLAST is performed on Roscoff ABIMS server
blast_18S_reformat(blast_file)6 Map
Use leaflet
lng_center = mean(station_table$longitude)
lat_center = mean(station_table$latitude)
map <- leaflet(width = 1000, height = 1000) %>% addTiles() %>% setView(lng = lng_center,
lat = lat_center, zoom = 11) %>% addCircleMarkers(data = station_table,
lat = ~latitude, lng = ~longitude, radius = 5, label = ~location, labelOptions = labelOptions(textsize = "10px",
noHide = T), clusterOptions = markerClusterOptions())
map7 Phyloseq analysis
7.1 Create phyloseq files for euk after filtering the data
Filter the euk data to remove the low bootstraps values (threshold : bootstrap > 90% at the supergroup level) and create a phyloseq file
Note the bootstrap threshold had to be higher for 98% compared to 97% (90% vs 65%). For ASV the same bootstrap has been used
otu_table_euk_final <- otu_table_final %>% filter(supergroup_boot > 90)
otu_mat <- otu_table_euk_final %>% select(otu = otu_id, matches("EC|PR|RM|SBW|STJ"),
-species, -species_boot)
tax_mat <- otu_table_euk_final %>% select(otu = otu_id, kingdom:species)
samples_df <- sample_table %>% rename(sample = sample_id)
row.names(otu_mat) <- otu_mat$otu
otu_mat <- otu_mat %>% select(-otu)
row.names(tax_mat) <- tax_mat$otu
tax_mat <- tax_mat %>% select(-otu)
row.names(samples_df) <- samples_df$sample
samples_df <- samples_df %>% select(-sample)
otu_mat <- as.matrix(otu_mat)
tax_mat <- as.matrix(tax_mat)
OTU = otu_table(otu_mat, taxa_are_rows = TRUE)
TAX = tax_table(tax_mat)
samples = sample_data(samples_df)
ps_euk <- phyloseq(OTU, TAX, samples)
ps_euk <- subset_samples(ps_euk, sequence_quality == "good")7.2 Break up into photosynthetic and non-photosynthetic
- Opisthokonta (Metazoa, Fungi) are removed
ps_euk <- subset_taxa(ps_euk, !(supergroup %in% c("Opisthokonta")))
cat("\nPhyloseq Eukaryotes \n========== \n")
Phyloseq Eukaryotes
==========
ps_eukphyloseq-class experiment-level object
otu_table() OTU Table: [ 663 taxa and 81 samples ]
sample_data() Sample Data: [ 81 samples by 22 sample variables ]
tax_table() Taxonomy Table: [ 663 taxa by 8 taxonomic ranks ]
ps_photo <- subset_taxa(ps_euk, (division %in% c("Chlorophyta", "Cryptophyta",
"Rhodophyta", "Haptophyta", "Ochrophyta")) | ((division == "Dinoflagellata") &
(class != "Syndiniales")) | (class == "Filosa-Chlorarachnea"))
cat("\nPhyloseq Photosynthetic Eukaryotes \n========== \n")
Phyloseq Photosynthetic Eukaryotes
==========
ps_photophyloseq-class experiment-level object
otu_table() OTU Table: [ 267 taxa and 81 samples ]
sample_data() Sample Data: [ 81 samples by 22 sample variables ]
tax_table() Taxonomy Table: [ 267 taxa by 8 taxonomic ranks ]
ps_hetero <- subset_taxa(ps_euk, !(division %in% c("Chlorophyta", "Cryptophyta",
"Rhodophyta", "Haptophyta", "Ochrophyta")) & !((division == "Dinoflagellata") &
!(class == "Syndiniales")) & !(class == "Filosa-Chlorarachnea"))
cat("\nPhyloseq Heterotrophic Eukaryotes \n========== \n")
Phyloseq Heterotrophic Eukaryotes
==========
ps_heterophyloseq-class experiment-level object
otu_table() OTU Table: [ 396 taxa and 81 samples ]
sample_data() Sample Data: [ 81 samples by 22 sample variables ]
tax_table() Taxonomy Table: [ 396 taxa by 8 taxonomic ranks ]
7.3 Create phyloseq files for proks
otu_table_prok <- otu_table %>% filter(taxo1 %in% c("Bacteria", "Archaea"))
otu_mat <- otu_table_prok %>% select(otu = otu_id, matches("EC|PR|RM|SBW|STJ"))
tax_mat <- otu_table_prok %>% select(otu = otu_id, taxo1:taxo7) %>% rename(kingdom = taxo1,
supergroup = taxo2, division = taxo3, class = taxo4, order = taxo5, family = taxo6,
genus = taxo7) %>% mutate(species = NA)
samples_df <- sample_table %>% rename(sample = sample_id)
row.names(otu_mat) <- otu_mat$otu
otu_mat <- otu_mat %>% select(-otu)
row.names(tax_mat) <- tax_mat$otu
tax_mat <- tax_mat %>% select(-otu)
row.names(samples_df) <- samples_df$sample
samples_df <- samples_df %>% select(-sample)
otu_mat <- as.matrix(otu_mat)
tax_mat <- as.matrix(tax_mat)
OTU = otu_table(otu_mat, taxa_are_rows = TRUE)
TAX = tax_table(tax_mat)
samples = sample_data(samples_df)
ps_prok <- phyloseq(OTU, TAX, samples)
ps_prok <- subset_samples(ps_prok, sequence_quality == "good")
cat("\nPhyloseq Prokaryotes \n========== \n")
Phyloseq Prokaryotes
==========
ps_heterophyloseq-class experiment-level object
otu_table() OTU Table: [ 396 taxa and 81 samples ]
sample_data() Sample Data: [ 81 samples by 22 sample variables ]
tax_table() Taxonomy Table: [ 396 taxa by 8 taxonomic ranks ]
7.4 Normalize number of reads in each sample using median sequencing depth.
- ! If there no cells do not transform, just set column to 0
function(x, t=total_hetero) (if(sum(x) > 0){ t * (x / sum(x))} else {x})
# First define a function to normalize
ps_normalize_median <- function(ps, title) {
ps_median = median(sample_sums(ps))
cat(sprintf("\nThe median number of reads used for normalization of %s is %.0f",
title, ps_median))
normalize_median = function(x, t = ps_median) (if (sum(x) > 0) {
t * (x/sum(x))
} else {
x
})
ps = transform_sample_counts(ps, normalize_median)
cat(str_c("\nPhyloseq ", title, "\n========== \n"))
print(ps)
}
# Apply to all the phyloseq files
ps_euk = ps_normalize_median(ps_euk, "eukaryotes (auto+hetero)")
The median number of reads used for normalization of eukaryotes (auto+hetero) is 4735
Phyloseq eukaryotes (auto+hetero)
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 663 taxa and 81 samples ]
sample_data() Sample Data: [ 81 samples by 22 sample variables ]
tax_table() Taxonomy Table: [ 663 taxa by 8 taxonomic ranks ]
ps_photo = ps_normalize_median(ps_photo, "eukaryotes autotrophs")
The median number of reads used for normalization of eukaryotes autotrophs is 3063
Phyloseq eukaryotes autotrophs
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 267 taxa and 81 samples ]
sample_data() Sample Data: [ 81 samples by 22 sample variables ]
tax_table() Taxonomy Table: [ 267 taxa by 8 taxonomic ranks ]
ps_hetero = ps_normalize_median(ps_hetero, "eukaryotes heterotrophs")
The median number of reads used for normalization of eukaryotes heterotrophs is 983
Phyloseq eukaryotes heterotrophs
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 396 taxa and 81 samples ]
sample_data() Sample Data: [ 81 samples by 22 sample variables ]
tax_table() Taxonomy Table: [ 396 taxa by 8 taxonomic ranks ]
ps_prok = ps_normalize_median(ps_prok, "prokaryotes")
The median number of reads used for normalization of prokaryotes is 54273
Phyloseq prokaryotes
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 2079 taxa and 81 samples ]
sample_data() Sample Data: [ 81 samples by 22 sample variables ]
tax_table() Taxonomy Table: [ 2079 taxa by 8 taxonomic ranks ]
7.5 Phyloseq files for abundant taxa
- Remove taxa that are < 0.10 (euks) and <0.05 (proks) in any given sample
- Normalize again…
ps_abundant <- function(ps, contrib_min = 0.1, title) {
total_per_sample <- max(sample_sums(ps))
ps <- filter_taxa(ps, function(x) sum(x > total_per_sample * contrib_min) >
0, TRUE)
ps <- ps_normalize_median(ps, title)
}
cat("Remove taxa in low abundance \n\n")Remove taxa in low abundance
ps_euk_abundant = ps_abundant(ps_euk, contrib_min = 0.1, "eukaryotes (auto+hetero)")
The median number of reads used for normalization of eukaryotes (auto+hetero) is 3359
Phyloseq eukaryotes (auto+hetero)
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 60 taxa and 81 samples ]
sample_data() Sample Data: [ 81 samples by 22 sample variables ]
tax_table() Taxonomy Table: [ 60 taxa by 8 taxonomic ranks ]
ps_photo_abundant = ps_abundant(ps_photo, contrib_min = 0.1, "eukaryotes autotrophs")
The median number of reads used for normalization of eukaryotes autotrophs is 2801
Phyloseq eukaryotes autotrophs
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 61 taxa and 81 samples ]
sample_data() Sample Data: [ 81 samples by 22 sample variables ]
tax_table() Taxonomy Table: [ 61 taxa by 8 taxonomic ranks ]
ps_hetero_abundant = ps_abundant(ps_hetero, contrib_min = 0.1, "eukaryotes heterotrophs")
The median number of reads used for normalization of eukaryotes heterotrophs is 687
Phyloseq eukaryotes heterotrophs
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 81 taxa and 81 samples ]
sample_data() Sample Data: [ 81 samples by 22 sample variables ]
tax_table() Taxonomy Table: [ 81 taxa by 8 taxonomic ranks ]
ps_prok_abundant = ps_abundant(ps_prok, contrib_min = 0.05, "prokaryotes")
The median number of reads used for normalization of prokaryotes is 24353
Phyloseq prokaryotes
==========
phyloseq-class experiment-level object
otu_table() OTU Table: [ 44 taxa and 81 samples ]
sample_data() Sample Data: [ 81 samples by 22 sample variables ]
tax_table() Taxonomy Table: [ 44 taxa by 8 taxonomic ranks ]
7.6 Create a list for the auto and hetero phyloseq files
ps_list <- list(ps = c(ps_prok, ps_euk, ps_photo), title = c("Prokaryotes - all OTUs",
"Eukaryotes - Auto and Hetero - all OTUs", "Eukaryotes - Autotrophs - all OTUs"))
ps_list_abundant <- list(ps = c(ps_prok_abundant, ps_euk_abundant, ps_photo_abundant),
title = c("Prokaryotes - abundant OTUs (> 5%)", "Eukaryotes - Auto + Hetero - abundant OTUs (> 10%)",
"Eukaryotes - Autotrophs - abundant OTUs (> 10%)"))7.7 Create tabular files for other plots (only for eukaryotes)
ps_to_long <- function(ps) {
otu_df <- data.frame(otu_table(ps)) %>% rownames_to_column(var = "otu_id")
taxo_df <- data.frame(tax_table(ps)) %>% rownames_to_column(var = "otu_id")
otu_df <- left_join(taxo_df, otu_df)
otu_df <- gather(otu_df, "sample", "n_seq", contains("X")) # All samples contain X
}
long_euk <- ps_to_long(ps_euk)
long_photo <- ps_to_long(ps_photo)
long_euk_abundant <- ps_to_long(ps_euk_abundant)
long_photo_abundant <- ps_to_long(ps_photo_abundant)7.8 Treemaps at division and class levels
treemap_dv(long_euk, c("division", "class"), "n_seq", "All euks")treemap_dv(long_photo, c("division", "class"), "n_seq", "Photo euks")7.9 Most abundant species
long_euk_species <- long_euk %>% mutate(species_label = str_c(class, species,
sep = "-")) %>% group_by(class, species, species_label) %>% summarize(n_seq = sum(n_seq)) %>%
arrange(desc(n_seq)) %>% ungroup()
ggplot(top_n(long_euk_species, 30, n_seq)) + geom_col(aes(x = reorder(species,
n_seq), y = n_seq, fill = class)) + coord_flip() + xlab("Species") + ylab("Number of species")7.10 Bar plot of divisions per station
Note: some stations are completely missing heterotrophs (Only Opistokonta)
for (i in 1:3) {
p <- plot_bar(ps_list$ps[[i]], x = "sample_label", fill = "division") +
geom_bar(aes(color = division, fill = division), stat = "identity",
position = "stack") + ggtitle(str_c("Division level - ", ps_list$title[[i]])) +
theme(axis.text.y = element_text(size = 10)) + theme(axis.text.x = element_text(angle = 0,
hjust = 0.5)) + coord_flip()
print(p)
}7.11 Bar plot of class per station
Only consider the abundant taxa
for (i in 1:3) {
p <- plot_bar(ps_list_abundant$ps[[i]], x = "sample_label", fill = "class") +
geom_bar(aes(color = class, fill = class), stat = "identity", position = "stack") +
ggtitle(str_c("Class level - ", ps_list_abundant$title[[i]])) + theme(axis.text.y = element_text(size = 10)) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) + coord_flip()
print(p)
}7.12 Compare by Straight, Site, Moonsoon (abundant OTUs only)
for (factor in c("strait", "location", "monsoon")) {
for (i in 1:3) {
ps_aggregate <- merge_samples(ps_list_abundant$ps[[i]], factor)
ps_aggregate <- transform_sample_counts(ps_aggregate, function(x) 100 *
(x/sum(x)))
p <- plot_bar(ps_aggregate, fill = "division") + geom_col(aes(color = division,
fill = division)) + ggtitle(str_c(ps_list_abundant$title[[i]], " - ",
factor)) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5,
hjust = 1)) + ylab("%")
print(p)
}
}7.13 Main taxa
7.13.1 Main genera for different division of Eukaryotes (Autotrophs)
for (one_division in c("Chlorophyta", "Dinoflagellata", "Ochrophyta")) {
ps_subset <- subset_taxa(ps_photo_abundant, division %in% one_division)
p <- plot_bar(ps_subset, x = "genus", facet_grid = ~strait) + geom_col() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
ggtitle(str_c(one_division, " - Abundant OTUs")) + coord_flip()
print(p)
}7.13.2 Main species of Mamiellophyceae and Diatoms (Eukaryotes - Autrotrophs)
for (one_class in c("Mamiellophyceae", "Dinophyceae", "Bacillariophyta")) {
ps_subset <- subset_taxa(ps_photo_abundant, class %in% one_class)
p <- plot_bar(ps_subset, x = "species", facet_grid = ~strait) + geom_col() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
ggtitle(str_c(one_class, " - Abundant OTUs")) + coord_flip()
print(p)
}7.14 Heatmaps
7.14.1 Abundant OTUs
- Data are agglomarated at the genus level. Use function
tax_glom
for (i in c(1)) {
ps_heat <- tax_glom(ps_list_abundant$ps[[i]], taxrank = "family")
p <- plot_heatmap(ps_heat, method = "NMDS", distance = "bray", taxa.label = "family",
taxa.order = "division", sample.label = "sample_label", sample.order = "sample_label",
low = "beige", high = "red", na.value = "beige", title = ps_list_abundant$title[[i]])
print(p)
}for (i in 2:3) {
ps_heat <- tax_glom(ps_list_abundant$ps[[i]], taxrank = "genus")
p <- plot_heatmap(ps_heat, method = "NMDS", distance = "bray", taxa.label = "genus",
taxa.order = "division", sample.label = "sample_label", sample.order = "sample_label",
low = "beige", high = "red", na.value = "beige", title = ps_list_abundant$title[[i]])
print(p)
}7.14.2 Chlorophyta at species level
All ASVs considered (not only abundant)
ps_heat <- subset_taxa(ps_photo, division == "Chlorophyta")
ps_heat <- tax_glom(ps_heat, taxrank = "species")
p <- plot_heatmap(ps_heat, method = "NMDS", distance = "bray", taxa.label = "species",
taxa.order = "species", sample.label = "sample_label", sample.order = "sample_label",
low = "beige", high = "red", na.value = "beige", trans = scales::log10_trans(),
title = "Mamiellophyceae in Singapore")
print(p)7.14.3 Mamiello (Only Bathy, Ostreo and Micromonas) at genus level
All ASVs considered (not only abundant)
ps_heat <- subset_taxa(ps_photo, genus %in% c("Ostreococcus", "Bathycoccus",
"Micromonas"))
ps_heat <- tax_glom(ps_heat, taxrank = "genus")
p <- plot_heatmap(ps_heat, method = "NMDS", distance = "bray", taxa.label = "genus",
taxa.order = "genus", sample.label = "sample_label", sample.order = "sample_label",
low = "beige", high = "red", na.value = "beige", trans = scales::log_trans(10),
title = "Mamiellophyceae in Singapore")
print(p)7.15 NMDS
Sample removed because they were pulling the NMDS * PR2X16XS21 it has a single eukaryote (diatom bloom ) * RM13XS36 cause problem for bacteria * PR11XS25 cause problem for hetero euks * SBW11XS26 cause problem for hetero euks * SBW13XS37 cause problem for hetero euks * RM13XS36 cause problem for hetero euks
Define function
ps_nmds <- function(ps_list) {
for (i in 1:3) {
ps_nmds <- ps_list$ps[[i]]
# Remove samples with no reads
ps_nmds <- prune_samples(sample_sums(ps_nmds) > 0, ps_nmds)
# Remove samples that caused problems (1= prok, 2=euk, 3=euk auto)
if (i == 1)
{
ps_nmds <- prune_samples(!(sample_names(ps_nmds) %in% c("RM13XS36")),
ps_nmds)
} # Prokaryotes
if (i %in% c(2, 3))
{
ps_nmds <- prune_samples(!(sample_names(ps_nmds) %in% c("PR2X16SXS21")),
ps_nmds)
} # Eukaryotes
if (i == 4)
{
ps_nmds <- prune_samples(!(sample_names(ps_nmds) %in% c("PR11XS25",
"SBW11XS26", "SBW13XS37")), ps_nmds)
} # Heterotrophs not used
singa.ord <- ordinate(ps_nmds, "NMDS", "bray")
# Factor to move the labels
nudge <- singa.ord[["points"]] * 0.05
p <- plot_ordination(ps_nmds, singa.ord, type = "samples", color = "strait",
shape = "monsoon", title = ps_list$title[[i]]) + geom_point(size = 5) +
scale_color_manual(values = c("red", "green", "blue")) + scale_shape_manual(values = c(17,
18, 15, 16)) + geom_text(aes(label = location_label), nudge_x = nudge,
nudge_y = nudge, check_overlap = FALSE, size = 3)
theme_bw()
print(singa.ord)
print(p)
p <- plot_ordination(ps_nmds, singa.ord, type = "taxa", color = "supergroup",
shape = "supergroup", title = ps_list$title[[i]]) + geom_point(size = 3) +
theme_bw()
print(p)
}
}7.15.1 All OTUs
ps_nmds(ps_list)Square root transformation
Wisconsin double standardization
Run 0 stress 0.12
Run 1 stress 0.14
Run 2 stress 0.13
Run 3 stress 0.14
Run 4 stress 0.14
Run 5 stress 0.13
Run 6 stress 0.13
Run 7 stress 0.12
... New best solution
... Procrustes: rmse 0.047 max resid 0.14
Run 8 stress 0.14
Run 9 stress 0.13
Run 10 stress 0.14
Run 11 stress 0.13
Run 12 stress 0.12
Run 13 stress 0.13
Run 14 stress 0.13
Run 15 stress 0.14
Run 16 stress 0.14
Run 17 stress 0.14
Run 18 stress 0.13
Run 19 stress 0.14
Run 20 stress 0.14
*** No convergence -- monoMDS stopping criteria:
20: stress ratio > sratmax
Call:
metaMDS(comm = veganifyOTU(physeq), distance = distance)
global Multidimensional Scaling using monoMDS
Data: wisconsin(sqrt(veganifyOTU(physeq)))
Distance: bray
Dimensions: 2
Stress: 0.12
Stress type 1, weak ties
No convergent solutions - best solution after 20 tries
Scaling: centring, PC rotation, halfchange scaling
Species: expanded scores based on 'wisconsin(sqrt(veganifyOTU(physeq)))'
Square root transformation
Wisconsin double standardization
Run 0 stress 0.19
Run 1 stress 0.18
... New best solution
... Procrustes: rmse 0.059 max resid 0.28
Run 2 stress 0.2
Run 3 stress 0.19
Run 4 stress 0.19
Run 5 stress 0.19
Run 6 stress 0.19
Run 7 stress 0.2
Run 8 stress 0.19
Run 9 stress 0.19
Run 10 stress 0.19
Run 11 stress 0.18
Run 12 stress 0.19
Run 13 stress 0.18
Run 14 stress 0.19
Run 15 stress 0.18
Run 16 stress 0.19
Run 17 stress 0.18
Run 18 stress 0.19
Run 19 stress 0.19
Run 20 stress 0.19
*** No convergence -- monoMDS stopping criteria:
20: stress ratio > sratmax
Call:
metaMDS(comm = veganifyOTU(physeq), distance = distance)
global Multidimensional Scaling using monoMDS
Data: wisconsin(sqrt(veganifyOTU(physeq)))
Distance: bray
Dimensions: 2
Stress: 0.18
Stress type 1, weak ties
No convergent solutions - best solution after 20 tries
Scaling: centring, PC rotation, halfchange scaling
Species: expanded scores based on 'wisconsin(sqrt(veganifyOTU(physeq)))'
Square root transformation
Wisconsin double standardization
Run 0 stress 0.18
Run 1 stress 0.17
... New best solution
... Procrustes: rmse 0.063 max resid 0.25
Run 2 stress 0.18
Run 3 stress 0.18
Run 4 stress 0.17
... New best solution
... Procrustes: rmse 0.048 max resid 0.23
Run 5 stress 0.18
Run 6 stress 0.18
Run 7 stress 0.17
Run 8 stress 0.18
Run 9 stress 0.18
Run 10 stress 0.17
Run 11 stress 0.18
Run 12 stress 0.18
Run 13 stress 0.17
Run 14 stress 0.17
Run 15 stress 0.18
Run 16 stress 0.19
Run 17 stress 0.19
Run 18 stress 0.19
Run 19 stress 0.18
Run 20 stress 0.18
*** No convergence -- monoMDS stopping criteria:
20: stress ratio > sratmax
Call:
metaMDS(comm = veganifyOTU(physeq), distance = distance)
global Multidimensional Scaling using monoMDS
Data: wisconsin(sqrt(veganifyOTU(physeq)))
Distance: bray
Dimensions: 2
Stress: 0.17
Stress type 1, weak ties
No convergent solutions - best solution after 20 tries
Scaling: centring, PC rotation, halfchange scaling
Species: expanded scores based on 'wisconsin(sqrt(veganifyOTU(physeq)))'
7.15.2 Abundant OTUs
ps_nmds(ps_list_abundant)Square root transformation
Wisconsin double standardization
Run 0 stress 0.12
Run 1 stress 0.15
Run 2 stress 0.13
Run 3 stress 0.41
Run 4 stress 0.12
... Procrustes: rmse 4.7e-05 max resid 0.00034
... Similar to previous best
Run 5 stress 0.14
Run 6 stress 0.13
Run 7 stress 0.12
Run 8 stress 0.14
Run 9 stress 0.14
Run 10 stress 0.13
Run 11 stress 0.12
Run 12 stress 0.13
Run 13 stress 0.12
Run 14 stress 0.12
Run 15 stress 0.12
Run 16 stress 0.15
Run 17 stress 0.15
Run 18 stress 0.12
Run 19 stress 0.13
Run 20 stress 0.12
*** Solution reached
Call:
metaMDS(comm = veganifyOTU(physeq), distance = distance)
global Multidimensional Scaling using monoMDS
Data: wisconsin(sqrt(veganifyOTU(physeq)))
Distance: bray
Dimensions: 2
Stress: 0.12
Stress type 1, weak ties
Two convergent solutions found after 20 tries
Scaling: centring, PC rotation, halfchange scaling
Species: expanded scores based on 'wisconsin(sqrt(veganifyOTU(physeq)))'
Square root transformation
Wisconsin double standardization
Run 0 stress 0.17
Run 1 stress 0.17
Run 2 stress 0.17
Run 3 stress 0.17
Run 4 stress 0.17
Run 5 stress 0.17
Run 6 stress 0.18
Run 7 stress 0.18
Run 8 stress 0.18
Run 9 stress 0.17
... New best solution
... Procrustes: rmse 0.049 max resid 0.24
Run 10 stress 0.17
Run 11 stress 0.17
Run 12 stress 0.18
Run 13 stress 0.17
Run 14 stress 0.17
Run 15 stress 0.17
Run 16 stress 0.17
Run 17 stress 0.18
Run 18 stress 0.17
Run 19 stress 0.17
Run 20 stress 0.17
... Procrustes: rmse 0.052 max resid 0.23
*** No convergence -- monoMDS stopping criteria:
20: stress ratio > sratmax
Call:
metaMDS(comm = veganifyOTU(physeq), distance = distance)
global Multidimensional Scaling using monoMDS
Data: wisconsin(sqrt(veganifyOTU(physeq)))
Distance: bray
Dimensions: 2
Stress: 0.17
Stress type 1, weak ties
No convergent solutions - best solution after 20 tries
Scaling: centring, PC rotation, halfchange scaling
Species: expanded scores based on 'wisconsin(sqrt(veganifyOTU(physeq)))'
Square root transformation
Wisconsin double standardization
Run 0 stress 0.16
Run 1 stress 0.16
Run 2 stress 0.17
Run 3 stress 0.16
Run 4 stress 0.16
Run 5 stress 0.17
Run 6 stress 0.17
Run 7 stress 0.17
Run 8 stress 0.16
Run 9 stress 0.17
Run 10 stress 0.17
Run 11 stress 0.17
Run 12 stress 0.17
Run 13 stress 0.16
Run 14 stress 0.17
Run 15 stress 0.17
Run 16 stress 0.17
Run 17 stress 0.17
Run 18 stress 0.16
Run 19 stress 0.17
Run 20 stress 0.16
*** No convergence -- monoMDS stopping criteria:
20: stress ratio > sratmax
Call:
metaMDS(comm = veganifyOTU(physeq), distance = distance)
global Multidimensional Scaling using monoMDS
Data: wisconsin(sqrt(veganifyOTU(physeq)))
Distance: bray
Dimensions: 2
Stress: 0.16
Stress type 1, weak ties
No convergent solutions - best solution after 20 tries
Scaling: centring, PC rotation, halfchange scaling
Species: expanded scores based on 'wisconsin(sqrt(veganifyOTU(physeq)))'
7.16 Network analysis
for (i in 1:3) {
ps_nmds <- ps_list_abundant$ps[[i]]
# Remove samples with no reads
ps_nmds <- prune_samples(sample_sums(ps_nmds) > 0, ps_nmds)
# Remove samples that caused problems (1= prok, 2=euk, 3=euk auto)
if (i == 1)
{
ps_nmds <- prune_samples(!(sample_names(ps_nmds) %in% c("RM13XS36")),
ps_nmds)
} # Prokaryotes
if (i %in% c(2, 3))
{
ps_nmds <- prune_samples(!(sample_names(ps_nmds) %in% c("PR2X16SXS21")),
ps_nmds)
} # Eukaryotes
if (i == 4)
{
ps_nmds <- prune_samples(!(sample_names(ps_nmds) %in% c("PR11XS25",
"SBW11XS26", "SBW13XS37")), ps_nmds)
} # Heterotrophs not used
if (i > 1) {
p <- plot_net(ps_nmds, distance = "(A+B-2*J)/(A+B)", type = "taxa",
maxdist = 0.4, color = "class", point_label = "genus") + ggtitle(ps_list_abundant$title[[i]])
} else {
p <- plot_net(ps_nmds, distance = "(A+B-2*J)/(A+B)", type = "taxa",
maxdist = 0.4, color = "class", point_label = "family") + ggtitle(ps_list_abundant$title[[i]])
}
print(p)
}